{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "b8pGnmtm8w6b"
   },
   "source": [
    "# Combining object detection results\n",
    "This notebook:\n",
    "* Combines Rollright Stones results from fine-tuned detector, ground-truth labelling, and off-the-shelf inference.\n",
    "* Includes combined results in answering (RQ2) and (RQ3) for the top five sites."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "RltfqJ6K8uwi"
   },
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1928,
     "status": "ok",
     "timestamp": 1560805925133,
     "user": {
      "displayName": "Tania Loke",
      "photoUrl": "",
      "userId": "13774328780814897200"
     },
     "user_tz": -60
    },
    "id": "NiJ8IPRM9sYH",
    "outputId": "ec702a5a-3412-43d9-f44d-47d735bfbef3"
   },
   "outputs": [],
   "source": [
    "import cv2\n",
    "import glob\n",
    "import networkx as nx\n",
    "import numpy as np\n",
    "import os\n",
    "import pandas as pd\n",
    "import pickle\n",
    "import seaborn as sns\n",
    "import six.moves.urllib as urllib\n",
    "import sys\n",
    "import tarfile\n",
    "import tensorflow as tf\n",
    "\n",
    "from collections import Counter, defaultdict, OrderedDict\n",
    "from datetime import datetime\n",
    "from io import StringIO\n",
    "from PIL import Image\n",
    "from scipy import stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib\n",
    "matplotlib.use('TkAgg')\n",
    "from matplotlib import pyplot as plt\n",
    "from matplotlib_venn import venn2, venn3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.image as mpimg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "nfvWzbeU91XS"
   },
   "outputs": [],
   "source": [
    "# Point to tensorflow/models/research folder from https://github.com/tensorflow/models/tree/master/research/object_detection\n",
    "tensorflow_folder = \"tensorflow/models/research\"\n",
    "CWD = os.getcwd()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "NyDPOqPn-K0A"
   },
   "source": [
    "## Object detection imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "BBKPxyYa99Q5"
   },
   "outputs": [],
   "source": [
    "sys.path.append(tensorflow_folder)\n",
    "\n",
    "from object_detection.utils import ops as utils_ops\n",
    "from object_detection.utils import label_map_util\n",
    "from object_detection.utils import visualization_utils as vis_util"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "1qWmOB8P-N9c"
   },
   "outputs": [],
   "source": [
    "# List of the strings that is used to add correct label for each box.\n",
    "PATH_TO_LABELS = os.path.join(tensorflow_folder, 'object_detection', 'data', 'oid_v4_label_map.pbtxt')\n",
    "NUM_CLASSES = 601\n",
    "PATH_TO_LABELS_FT = 'label_map.pbtxt'\n",
    "NUM_CLASSES_FT = 2\n",
    "\n",
    "# Size, in inches, of the output images.\n",
    "IMAGE_SIZE = (12, 8)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "9RwslqZo-ZUT"
   },
   "source": [
    "## Load label map"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 88
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 764,
     "status": "ok",
     "timestamp": 1560806293227,
     "user": {
      "displayName": "Tania Loke",
      "photoUrl": "",
      "userId": "13774328780814897200"
     },
     "user_tz": -60
    },
    "id": "0UHAJVif-eWk",
    "outputId": "c4d74506-92a6-4bc0-89e3-564d00c262f2"
   },
   "outputs": [],
   "source": [
    "label_map = label_map_util.load_labelmap(PATH_TO_LABELS)\n",
    "categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)\n",
    "category_index = label_map_util.create_category_index(categories)\n",
    "label_map_ft = label_map_util.load_labelmap(PATH_TO_LABELS_FT)\n",
    "categories_ft = label_map_util.convert_label_map_to_categories(label_map_ft, max_num_classes=NUM_CLASSES_FT, use_display_name=True)\n",
    "category_index_ft = label_map_util.create_category_index(categories_ft)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "CITgohF7-eqM"
   },
   "outputs": [],
   "source": [
    "def load_image_into_numpy_array(image):\n",
    "    (im_width, im_height) = image.size\n",
    "    image_as_data = np.array(image.getdata())[:,:3]\n",
    "    return image_as_data.reshape((im_height, im_width, 3)).astype(np.uint8)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load pickled output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "SITE_NAMES = ['CastleriggStoneCircle','RuffordAbbey','ReculverTowersandRomanFort','BuryStEdmundsAbbey','RollrightStones']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "output_dicts = {}\n",
    "with open('castlerigg_output_dicts_3978.pickle', 'rb') as f:\n",
    "    output_dicts[SITE_NAMES[0]] = pickle.load(f)\n",
    "with open('rufford_output_dicts_3978.pickle', 'rb') as f:\n",
    "    output_dicts[SITE_NAMES[1]] = pickle.load(f)\n",
    "with open('reculver_output_dicts_3978.pickle', 'rb') as f:\n",
    "    output_dicts[SITE_NAMES[2]] = pickle.load(f)\n",
    "with open('bury_output_dicts_3978.pickle', 'rb') as f:\n",
    "    output_dicts[SITE_NAMES[3]] = pickle.load(f)\n",
    "with open('rollright_output_dicts_3978.pickle', 'rb') as f:\n",
    "    output_dicts[SITE_NAMES[4]] = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "output_dfs = {}\n",
    "for site in SITE_NAMES:\n",
    "    df = pd.DataFrame.from_dict(output_dicts[site],orient='index')\n",
    "    df.index = [os.path.join('sample_{}'.format(site),\n",
    "                    os.path.basename(i)) for i in list(df.index)]\n",
    "    df = df.reset_index()\n",
    "    df = df.rename(columns={'index':'image_path'})\n",
    "    output_dfs[site] = df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Visualisation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_scores(output_dict, category_index):\n",
    "    scores = output_dict['detection_scores']\n",
    "    classes = output_dict['detection_classes']\n",
    "\n",
    "    for s, c in zip(scores,classes):\n",
    "        if s is None or s > 0.5:\n",
    "            class_name = category_index[c]['name']\n",
    "            print('{}:\\t{:.0%}'.format(class_name, s))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The row ``detection_boxes`` uses normalised coordinates for ``[ymin, xmin, ymax, xmax]`` (i.e. bottom, left, top, right), which can be converted to pixel coordinates as follows:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pixel_coordinates(normalised_coordinates, image_path):\n",
    "    img = Image.open(image_path)\n",
    "    ymin, xmin, ymax, xmax = normalised_coordinates\n",
    "    width, height = img.size\n",
    "    ymin = ymin*height\n",
    "    xmin = xmin*width\n",
    "    ymax = ymax*height\n",
    "    xmax = xmax*width\n",
    "    return np.array([ymin,xmin,ymax,xmax])\n",
    "\n",
    "def pixel_boxes(row):\n",
    "    return np.array([pixel_coordinates(box, row['image_path']) for box in row['detection_boxes']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i, row in output_dfs['CastleriggStoneCircle'][:5].iterrows():\n",
    "    image = Image.open(row['image_path'])\n",
    "\n",
    "    # the array based representation of the image will be used later in order to prepare the\n",
    "    # result image with boxes and labels on it.\n",
    "    image_np = load_image_into_numpy_array(image)\n",
    "\n",
    "    # Print scores\n",
    "    print_scores(row, category_index)\n",
    "\n",
    "    # Visualization of the results of a detection.\n",
    "    # With conversion to normalised coordinates\n",
    "    vis_util.visualize_boxes_and_labels_on_image_array(\n",
    "        image_np,\n",
    "        pixel_boxes(row),\n",
    "        row['detection_classes'],\n",
    "        row['detection_scores'],\n",
    "        category_index,\n",
    "        instance_masks=row.get('detection_masks'),\n",
    "        use_normalized_coordinates=False,\n",
    "        line_thickness=8)\n",
    "    # Without conversion to normalised coordinates\n",
    "#     vis_util.visualize_boxes_and_labels_on_image_array(\n",
    "#         image_np,\n",
    "#         row['detection_boxes'],\n",
    "#         row['detection_classes'],\n",
    "#         row['detection_scores'],\n",
    "#         category_index,\n",
    "#         instance_masks=row.get('detection_masks'),\n",
    "#         use_normalized_coordinates=True,\n",
    "#         line_thickness=8)\n",
    "\n",
    "    plt.figure(figsize=IMAGE_SIZE)\n",
    "    plt.axis('off')\n",
    "    plt.imshow(image_np)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Object counts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "_Note_: ``object_labels``, ``object names``, ``num_objects`` exclude those with ``detection_scores`` below threshold."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def objectArea(box):\n",
    "    ymin, xmin, ymax, xmax = box\n",
    "    width = xmax-xmin\n",
    "    height = ymax-ymin\n",
    "    return width*height"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for site in SITE_NAMES:\n",
    "    output_dfs[site]['object_labels'] = output_dfs[site].apply(lambda row:\n",
    "        [c for i, c in enumerate(row['detection_classes']) if row['detection_scores'][i]>0.5], axis=1)\n",
    "    output_dfs[site]['object_names'] = output_dfs[site].apply(lambda row:\n",
    "        [category_index[i]['name'] for i in row['object_labels']], axis=1)\n",
    "    output_dfs[site]['num_objects'] = output_dfs[site].apply(lambda row:\n",
    "        len(row['object_labels']), axis=1)\n",
    "    output_dfs[site]['object_boxes'] = output_dfs[site].apply(lambda row:\n",
    "        [box for i,box in enumerate(row['detection_boxes']) if row['detection_scores'][i]>0.5], axis=1)\n",
    "    output_dfs[site]['object_areas'] = output_dfs[site].apply(lambda row:\n",
    "        [objectArea(box) for box in row['object_boxes']], axis=1)\n",
    "output_dfs['CastleriggStoneCircle'].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('df_tl.pickle','rb') as f:\n",
    "    df_tl = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_tl.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Count of object labels across images:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "object_counts = {}\n",
    "for site in SITE_NAMES:\n",
    "    object_counts[site] = pd.DataFrame.from_dict(Counter(output_dfs[site]['object_names'].sum()),\n",
    "                           orient='index', columns=['count']).sort_values(by=['count'],ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for site in SITE_NAMES:\n",
    "    print(\"{}\\n\\tN images:\\t\\t{}\\n\\tN objects:\\t\\t{}\\n\\tMean objects/image:\\t{:.3f}\\\n",
    "        \\n\\tN images w/out objects:\\t{}\\n\\tN unique objects:\\t{}\\n\\tN unique, count>=10:\\t{}\".format(\n",
    "        site,len(output_dfs[site]),object_counts[site]['count'].sum(),output_dfs[site]['num_objects'].mean(),\n",
    "        len(output_dfs[site][output_dfs[site]['num_objects']==0]),len(object_counts[site]),\n",
    "        len(object_counts[site][object_counts[site]['count']>=10])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for site in SITE_NAMES:\n",
    "    ax = object_counts[site].iloc[:30].plot(kind='bar',rot=90,legend=False,align='center',width=0.5,figsize=(6,4))\n",
    "    ax.set(ylabel='Count')\n",
    "    ax.set(title='Objects in {} images of {}'.format(len(output_dfs[site]),site))\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def hasO(row,o):\n",
    "    if o in row:\n",
    "        return True\n",
    "    else:\n",
    "        return False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = output_dfs['RuffordAbbey'].copy()\n",
    "test['bluejay'] = test.apply(lambda row: hasO(row['object_names'],'Blue jay'),axis=1)\n",
    "test['rose'] = test.apply(lambda row: hasO(row['object_names'],'Rose'),axis=1)\n",
    "test['bronze'] = test.apply(lambda row: hasO(row['object_names'],'Bronze sculpture'),axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in list(test[test['bronze']==True]['image_path']):\n",
    "    img = mpimg.imread(i)\n",
    "    plt.imshow(img)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Retrieve key objects of interest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "SITE_NAMES = SITE_NAMES[:-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "OBJECTS = ['Human face',\n",
    "           'Person','Woman','Man','Boy','Girl',\n",
    "           'Dog','Sculpture','Bronze sculpture',\n",
    "           'Bicycle','Tree',\n",
    "           'Flower','Rose',\n",
    "           'Building','Castle','Tower','House',\n",
    "           'Bird','Duck','Goose','Swan','Sparrow','Owl','Canary','Raven','Blue jay','Falcon','Eagle']\n",
    "object_dfs = {}\n",
    "for site in SITE_NAMES:\n",
    "    df = output_dfs[site][['image_path','object_names','object_boxes','object_areas']].copy()\n",
    "    df['key_objects'] = df.apply(lambda row: [i for i in row['object_names'] if i in OBJECTS],axis=1)\n",
    "    df['key_areas'] = df.apply(lambda row: [i for c,i in enumerate(row['object_areas'])\n",
    "                                            if row['object_names'][c] in OBJECTS],axis=1)\n",
    "    df['key_boxes'] = df.apply(lambda row: [i for c,i in enumerate(row['object_boxes'])\n",
    "                                            if row['object_names'][c] in OBJECTS],axis=1)\n",
    "    object_dfs[site] = df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "key_object_counts = {}\n",
    "for site in SITE_NAMES:\n",
    "    key_object_counts[site] = pd.DataFrame.from_dict(Counter(object_dfs[site]['key_objects'].sum()),\n",
    "                           orient='index', columns=['count']).sort_values(by=['count'],ascending=False)\n",
    "key_object_counts['RollrightStones'].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for site in SITE_NAMES:\n",
    "    ax = key_object_counts[site].plot(kind='bar',rot=90,legend=False,align='center',width=0.5,figsize=(6,4))\n",
    "    ax.set(ylabel='Count')\n",
    "    ax.set(title='Key objects in {} images of {}'.format(len(output_dfs[site]),site))\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Get selfies"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Plot distribution of ``Human face`` areas in dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def categoryArea(object_list,area_list,category):\n",
    "    \"\"\"\n",
    "    Input: category_list(list)-list of object categories\n",
    "           area_list(list)-list of corresponding areas\n",
    "    Returns: areas(list)-areas of detections of category\n",
    "    \"\"\"\n",
    "    areas = []\n",
    "    for i,cat in enumerate(object_list):\n",
    "        if cat==category:\n",
    "            areas.append(area_list[i])\n",
    "    return areas\n",
    "\n",
    "def dfAreas(df,category):\n",
    "    \"\"\"\n",
    "    Input: df(DataFrame)-object_dfs[site]\n",
    "           category(str)-object category\n",
    "    Returns: df with columns image_path, {category}_areas(list)\n",
    "    \"\"\"\n",
    "    colname = '{}_areas'.format(category)\n",
    "    temp_df = df[['image_path','key_objects','key_areas']].copy()\n",
    "    temp_df[colname] = temp_df.apply(lambda row: categoryArea(row['key_objects'],row['key_areas'],category),axis=1)\n",
    "    return temp_df[['image_path',colname]].copy()\n",
    "\n",
    "def pathstoArea(paths_col,area_col,threshold,geq=True):\n",
    "    \"\"\"\n",
    "    Input: paths_col, area_col-columns from dfAreas[site]\n",
    "    Returns: image paths with area>=threshold\n",
    "    \"\"\"\n",
    "    paths = []\n",
    "    for i, area_list in enumerate(list(area_col)):\n",
    "        if geq==True:\n",
    "            past_threshold = [i for i in area_list if i>=threshold]\n",
    "        elif geq==False:\n",
    "            past_threshold = [i for i in area_list if i<threshold]\n",
    "        if len(past_threshold)>0:\n",
    "            paths.append(list(paths_col)[i])\n",
    "    return paths"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "human_face_areas = {} # dictionary with {site: df containing image_path, category_areas(list)}\n",
    "human_face_areas_list = {} # dictionary with {site: list containing all areas of category}\n",
    "for site in SITE_NAMES:\n",
    "    human_face_areas[site] = dfAreas(object_dfs[site],'Human face')\n",
    "    human_face_areas_list[site] = [item for sublist in list(human_face_areas[site]['Human face_areas']) for item in sublist]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for site in SITE_NAMES:\n",
    "    assert len(human_face_areas_list[site]) == int(key_object_counts[site].loc['Human face'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "human_face_areas_all = []\n",
    "for site in SITE_NAMES:\n",
    "    human_face_areas_all += human_face_areas_list[site]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.boxplot(human_face_areas_all)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print('Mean: {}'.format(np.mean(human_face_areas_all)))\n",
    "print('Min: {}'.format(min(human_face_areas_all)))\n",
    "print('Median: {}'.format(np.median(human_face_areas_all)))\n",
    "print('Max: {}'.format(max(human_face_areas_all)))\n",
    "plt.figure(figsize=(8,6))\n",
    "plt.hist(human_face_areas_all,density=True,bins=100,range=(0,0.3))\n",
    "plt.ylabel('Density')\n",
    "plt.xlabel('Area of \"Human face\" (proportion of image area)')\n",
    "ax = plt.gca()\n",
    "for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +\n",
    "             ax.get_xticklabels() + ax.get_yticklabels()):\n",
    "    item.set_fontsize(16)\n",
    "plt.axvline(np.median(human_face_areas_all), color='k', linestyle='dashed', linewidth=1)\n",
    "plt.text(np.median(human_face_areas_all)+0.0025,20,'median$=0.038$',size=16)\n",
    "# plt.savefig('4_face_hist.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "human_face_paths = {}\n",
    "for site in SITE_NAMES:\n",
    "    human_face_paths[site] = pathstoArea(human_face_areas[site]['image_path'],human_face_areas[site]['Human face_areas'],np.median(human_face_areas_all),geq=True)\n",
    "    # human_face_paths[site] = pathstoArea(human_face_areas[site]['image_path'],human_face_areas[site]['Human face_areas'],0.05,geq=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for path in human_face_paths['BuryStEdmundsAbbey'][:5]:\n",
    "    img = mpimg.imread(path)\n",
    "    plt.imshow(img)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Replace object labels:\n",
    "* ``Boy``, ``Girl``, ``Man``, ``Woman`` $\\rightarrow$ ``Person``;\n",
    "* ``Duck``, ``Goose``, ``Swan``  $\\rightarrow$ ``Bird``; and\n",
    "* ``Human face`` $\\rightarrow$ ``Selfie`` if proportional area $\\geq$ median."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "PERSONS = ['Woman','Man','Boy','Girl']\n",
    "BIRDS = ['Duck','Goose','Swan','Sparrow','Owl','Canary','Raven','Blue jay','Falcon','Eagle']\n",
    "FLOWERS = ['Rose']\n",
    "\n",
    "def replaceLabels(row):\n",
    "    new_labels = []\n",
    "    for c, label in enumerate(row['key_objects']):\n",
    "        if label in PERSONS:\n",
    "            new_labels.append('Person')\n",
    "        elif label in BIRDS:\n",
    "            new_labels.append('Bird')\n",
    "        elif label=='Rose':\n",
    "            new_labels.append('Flower')\n",
    "        elif label=='Bronze sculpture':\n",
    "            new_labels.append('Sculpture')\n",
    "        elif label=='Human face':\n",
    "            if row['key_areas'][c] >= np.median(human_face_areas_all):\n",
    "                new_labels.append('Selfie')\n",
    "        else:\n",
    "            new_labels.append(label)\n",
    "    return new_labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for site in SITE_NAMES:\n",
    "    object_dfs[site]['key_objects_v2'] = object_dfs[site].apply(lambda row:\n",
    "                                            replaceLabels(row),axis=1)\n",
    "    object_dfs[site]['key_areas_v2'] = object_dfs[site].apply(lambda row:\n",
    "        [area for i,area in enumerate(row['key_areas']) if not ((row['key_objects'][i]=='Human face') & (row['key_areas'][i]>= np.median(human_face_areas_all)))], axis=1)\n",
    "    object_dfs[site]['num_objects'] = object_dfs[site].apply(lambda row: len(row['key_objects_v2']),axis=1)\n",
    "    object_dfs[site]['unique_objects'] = object_dfs[site].apply(lambda row: list(set(row['key_objects_v2'])),axis=1)\n",
    "    object_dfs[site]['num_unique_objects'] = object_dfs[site].apply(lambda row: len(row['unique_objects']),axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_ots = object_dfs['RollrightStones'][['image_path','key_objects_v2']].copy()\n",
    "df_ots['filename'] = df_ots.apply(lambda row: os.path.basename(row['image_path']),axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_both = df_ots.merge(df_tl,on='filename',how='inner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(df_ots) == len(df_tl) == len(df_both)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_both['key_objects_v2'] = df_both['key_objects_v2'] + df_both['object_names']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def asteriskStone(string):\n",
    "    if string=='Stone':\n",
    "        return '{}*'.format(string)\n",
    "    else:\n",
    "        return string"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_both['key_objects_v2'] = df_both.apply(lambda row: [asteriskStone(i) for i in row['key_objects_v2']],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_both = df_both[['image_path','key_objects_v2']].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_both['num_objects'] = df_both.apply(lambda row: len(row['key_objects_v2']),axis=1)\n",
    "df_both['unique_objects'] = df_both.apply(lambda row: list(set(row['key_objects_v2'])),axis=1)\n",
    "df_both['num_unique_objects'] = df_both.apply(lambda row: len(row['unique_objects']),axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "object_dfs['RollrightStones*'] = df_both.copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "SITE_NAMES = ['CastleriggStoneCircle','RuffordAbbey','ReculverTowersandRomanFort','BuryStEdmundsAbbey','RollrightStones','RollrightStones*']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "key_object_counts_v2 = {}\n",
    "for site in SITE_NAMES:\n",
    "    key_object_counts_v2[site] = pd.DataFrame.from_dict(Counter(object_dfs[site]['key_objects_v2'].sum()),\n",
    "                           orient='index', columns=['count']).sort_values(by=['count'],ascending=False)\n",
    "key_object_counts_v2['CastleriggStoneCircle'].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "key_object_counts_v2['RollrightStones*']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for site in SITE_NAMES:\n",
    "    ax = key_object_counts_v2[site].plot(kind='bar',rot=90,legend=False,align='center',width=0.5,figsize=(6,4))\n",
    "    ax.set(title='Key objects in {} images of {}'.format(len(object_dfs[site]),site))\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for site in SITE_NAMES:\n",
    "    print(\"{}\\n\\tN images:\\t\\t{}\\n\\tN objects:\\t\\t{}\\n\\tMean objects/image:\\t{:.3f}\".format(\n",
    "        site,len(object_dfs[site]),key_object_counts_v2[site]['count'].sum(),\n",
    "        sum(key_object_counts_v2[site]['count'])/len(object_dfs[site])))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Get proportion of images with given object."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getLabel(label, lis):\n",
    "    if label in lis:\n",
    "        return 1\n",
    "    else:\n",
    "        return 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "OBJECTS_V2 = ['Selfie','Person','Dog','Sculpture','Bicycle','Tree','Flower','Building','Castle','Tower','House','Bird','Stone*']\n",
    "imageswith_dfs = {}\n",
    "for site in SITE_NAMES:\n",
    "    imageswith_dfs[site] = object_dfs[site][['image_path','key_objects_v2','num_unique_objects']].copy()\n",
    "    for obj in OBJECTS_V2:\n",
    "        imageswith_dfs[site][obj] = imageswith_dfs[site].apply(lambda row: getLabel(obj,row['key_objects_v2']), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "proportions = {}\n",
    "for site in SITE_NAMES:\n",
    "    proportions[site] = pd.DataFrame(OBJECTS_V2,columns=['Object'])\n",
    "    proportions[site]['image_count'] = 0\n",
    "    for obj in OBJECTS_V2:\n",
    "        proportions[site].loc[proportions[site]['Object']==obj,'image_count'] = imageswith_dfs[site][obj].sum()\n",
    "    proportions[site]['image_percent'] = 100*proportions[site]['image_count']/len(imageswith_dfs[site])\n",
    "    proportions[site] = proportions[site].set_index('Object').sort_values(by=['image_count'],ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "proportions['CastleriggStoneCircle'].head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# with open('imageswith_dfs.pickle', 'wb') as f:\n",
    "#     pickle.dump(imageswith_dfs, f, pickle.HIGHEST_PROTOCOL)\n",
    "with open('imageswith_dfs.pickle', 'rb') as f:\n",
    "    imageswith_dfs = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for site in SITE_NAMES:\n",
    "    ax = proportions[site]['image_percent'].plot(kind='bar',rot=90,legend=False,align='center',width=0.5,figsize=(6,4))\n",
    "    ax.set(ylabel='Percentage of images')\n",
    "    ax.xaxis.grid(False)\n",
    "    ax.set(title='{}'.format(site))\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Stcaked bar chart with sites as bars and objects stacked, as sites' \"barcodes\"."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "stacked = []\n",
    "for site in SITE_NAMES:\n",
    "    stacked.append(proportions[site]['image_percent'].copy().rename(site))\n",
    "stacked = pd.concat(stacked,axis=1,keys=[s.name for s in stacked],sort=True).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# cols = list(stacked.sum().sort_values(ascending=False).index) # order objects from most to least number of images across sites\n",
    "cols = ['Person','Selfie','Tree','Castle','Building','House','Tower','Sculpture','Flower','Bird','Dog','Bicycle','Stone*']\n",
    "stacked = stacked[cols]\n",
    "display(stacked)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cmap = matplotlib.cm.get_cmap('Paired')\n",
    "rgba = [cmap(i) for i in range(12)] + [(0.5,0.5,0.5,1)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "stacked.plot.barh(stacked=True,color=rgba,figsize=(6,4)).legend(bbox_to_anchor=(1,1.035),prop={'size':12})\n",
    "ax = plt.gca()\n",
    "ax.invert_yaxis()\n",
    "ax.xaxis.grid(True)\n",
    "plt.xlabel('Percentage of site\\'s images containing object')\n",
    "ax.set_axisbelow(True)\n",
    "for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +\n",
    "             ax.get_xticklabels() + ax.get_yticklabels()):\n",
    "    item.set_fontsize(12)\n",
    "# Note: Percentages for each site don't sum to 100% because objects can occur in any number of images\n",
    "plt.savefig('5_barcodes.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "stacked2[:-1][cols2[:-1]].T.plot.barh(width=0.9,figsize=(8,6),rot=0,colormap='Accent').legend(bbox_to_anchor=(0.38,0.63),prop={'size':14})\n",
    "ax = plt.gca()\n",
    "ax.set_axisbelow(True)\n",
    "for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +\n",
    "             ax.get_xticklabels() + ax.get_yticklabels()):\n",
    "    item.set_fontsize(14)\n",
    "ax.xaxis.grid(True)\n",
    "ax.set_axisbelow(True)\n",
    "ax.invert_yaxis()\n",
    "plt.xlabel('Percentage of site\\'s images containing object')\n",
    "plt.savefig('5_objects.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(imageswith_dfs[SITE_NAMES[0]][imageswith_dfs[SITE_NAMES[0]]['num_unique_objects']==0]['image_path'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "temp_paths = list(imageswith_dfs[SITE_NAMES[1]][imageswith_dfs[SITE_NAMES[1]]['num_unique_objects']==0]['image_path'])\n",
    "plt.figure(figsize=(12,14))\n",
    "for n, image_path in enumerate([temp_paths[i] for i in [0,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]]): # of 428 possible\n",
    "    plt.subplot(5,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.0, hspace=0.05)\n",
    "plt.savefig('5_rufford_none.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12,14))\n",
    "for n, image_path in enumerate(imageswith_dfs[SITE_NAMES[0]][imageswith_dfs[SITE_NAMES[0]]['num_unique_objects']==0]['image_path'][:20]): # of 1145 possible\n",
    "    plt.subplot(5,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.0, hspace=0.05)\n",
    "plt.savefig('5_castlerigg_none.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(imageswith_dfs[SITE_NAMES[2]][imageswith_dfs[SITE_NAMES[2]]['num_unique_objects']==0]['image_path'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12,14))\n",
    "for n, image_path in enumerate(imageswith_dfs[SITE_NAMES[2]][imageswith_dfs[SITE_NAMES[2]]['num_unique_objects']==0]['image_path'][:20]): # of 1035 possible\n",
    "    plt.subplot(5,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.0, hspace=0.05)\n",
    "plt.savefig('5_reculver_none.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(imageswith_dfs[SITE_NAMES[3]][imageswith_dfs[SITE_NAMES[3]]['num_unique_objects']==0]['image_path'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12,14))\n",
    "for n, image_path in enumerate(imageswith_dfs[SITE_NAMES[3]][imageswith_dfs[SITE_NAMES[3]]['num_unique_objects']==0]['image_path'][10:30]): # of 412 possible\n",
    "    plt.subplot(5,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.0, hspace=0.05)\n",
    "plt.savefig('5_bury_none.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(imageswith_dfs[SITE_NAMES[4]][imageswith_dfs[SITE_NAMES[4]]['num_unique_objects']==0]['image_path'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12,14))\n",
    "for n, image_path in enumerate(imageswith_dfs[SITE_NAMES[4]][imageswith_dfs[SITE_NAMES[4]]['num_unique_objects']==0]['image_path'][10:30]): # of 792 possible\n",
    "    plt.subplot(5,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.0, hspace=0.05)\n",
    "plt.savefig('5_rollright_none.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(df_both[df_both['num_unique_objects']==0]['image_path'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12,14))\n",
    "for n, image_path in enumerate(df_both[df_both['num_unique_objects']==0]['image_path'][5:25]): # of 230 possible\n",
    "    plt.subplot(5,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.0, hspace=0.05)\n",
    "plt.savefig('5_rollright2_none.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Co-occurrence matrix of objects"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "[Construct co-occurrence matrix](https://stackoverflow.com/questions/42814452/co-occurrence-matrix-from-list-of-words-in-python) of key objects, with entries as number of co-occurrences between objects."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "matrices = {}\n",
    "for site in SITE_NAMES:\n",
    "    names_per_image = list(object_dfs[site].loc[\n",
    "        object_dfs[site]['num_unique_objects']>0]['unique_objects']) # exclude images without objects\n",
    "    unique_names = list(set([i for l in names_per_image for i in l]))\n",
    "    unique_names = [i for i in unique_names if proportions[site].loc[\n",
    "        i,'image_percent']>=1] # keep objects occurring in at least 1% of images\n",
    "    unique_names.sort()\n",
    "    occurrences = OrderedDict((name, OrderedDict((name, 0) for name in unique_names))\n",
    "                              for name in unique_names)\n",
    "    # Find the co-occurrences\n",
    "    for l in names_per_image:\n",
    "        l = [name for name in l if name in unique_names]\n",
    "        for i in range(len(l)):\n",
    "            for item in l[:i] + l[i + 1:]:\n",
    "                occurrences[l[i]][item] += 1\n",
    "    matrices[site] = pd.DataFrame.from_dict(occurrences)\n",
    "    # Set diagonal entries to number of images where object occurs alone\n",
    "    for obj in unique_names:\n",
    "        matrices[site][obj][obj] = len(imageswith_dfs[site].loc[\n",
    "            (imageswith_dfs[site]['num_unique_objects']==1) & (imageswith_dfs[site][obj]==1)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "matrices['RollrightStones*']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Normalise entries (i.e. edge weights) by number of images in which either object occurs."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def imagesWithBoth(df,cat1,cat2):\n",
    "    \"\"\"\n",
    "    Input: df(DataFrame)-imageswith_dfs[site]\n",
    "           cat[1/2](str)-object category\n",
    "    Returns: count of images in which both cat1 & cat2 occur\n",
    "    \"\"\"\n",
    "    or_col = df[cat1] & df[cat2]\n",
    "    return or_col.sum()\n",
    "\n",
    "def imagesWithEither(df,cat1,cat2):\n",
    "    \"\"\"\n",
    "    Input: df(DataFrame)-imageswith_dfs[site]\n",
    "           cat[1/2](str)-object category\n",
    "    Returns: count of images in which cat1 &/ cat2 occur\n",
    "    \"\"\"\n",
    "    or_col = df[cat1] | df[cat2]\n",
    "    return or_col.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "matrices_norm = matrices.copy()\n",
    "for site in SITE_NAMES:\n",
    "    mat_denom = pd.DataFrame(index=matrices_norm[site].index, columns=matrices_norm[site].index)\n",
    "    for rowname,row in matrices_norm[site].iterrows():\n",
    "        for col in row.index:\n",
    "            mat_denom[rowname][col] = imagesWithEither(imageswith_dfs[site],rowname,col)\n",
    "    matrices_norm[site] = 100*matrices_norm[site] / mat_denom\n",
    "    matrices_norm[site] = matrices_norm[site].astype(np.float64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "matrices_norm['CastleriggStoneCircle'] # of all images containing either obj1 or obj2, what is the probability that obj1 and obj2 co-occur"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "imagesWithEither(imageswith_dfs['RuffordAbbey'],'Bird','Bird')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Gs = {}\n",
    "sizes = {}\n",
    "for site in SITE_NAMES:\n",
    "    A = np.matrix(matrices_norm[site])\n",
    "    G = nx.from_numpy_matrix(A, create_using=nx.MultiGraph())\n",
    "    object_list = list(matrices_norm[site].index)\n",
    "    mapping = {k:object_list[k] for k in range(len(object_list))}\n",
    "    G = nx.relabel_nodes(G,mapping)\n",
    "    if site == 'RollrightStones*':\n",
    "        name = 'RollrightStones2'\n",
    "        nx.write_gml(G, \"{}.gml\".format(name))\n",
    "    else:\n",
    "        nx.write_gml(G, \"{}.gml\".format(site))\n",
    "    Gs[site] = G\n",
    "    sizes[site] = dict(zip(object_list, [proportions[site]['image_count'].loc[i] for i in object_list]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for site in SITE_NAMES:\n",
    "    print(site)\n",
    "    print(sizes[site])\n",
    "    nx.draw(Gs[site],nx.spring_layout(Gs[site]),node_size=list(sizes[site].values()),with_labels=True)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Co-occurrence networks with structural objects combined\n",
    "Combine:\n",
    "* ``Structure``: ``Building``, ``Castle``, ``House``, ``Tower``"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def combineLabels(row):\n",
    "    new_labels = []\n",
    "    for c, label in enumerate(row['key_objects_v2']):\n",
    "#         if label in ['Person','Selfie']:\n",
    "#             new_labels.append('Human')\n",
    "        if label in ['Building','Castle','House','Tower']:\n",
    "            new_labels.append('Structure')\n",
    "        else:\n",
    "            new_labels.append(label)\n",
    "    return new_labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "object_combined = {}\n",
    "for site in SITE_NAMES:\n",
    "    object_combined[site] = object_dfs[site][['image_path','key_objects_v2']].copy()\n",
    "    object_combined[site]['key_objects_v3'] = object_combined[site].apply(lambda row:\n",
    "                                            combineLabels(row),axis=1)\n",
    "    object_combined[site]['num_objects'] = object_combined[site].apply(lambda row: len(row['key_objects_v3']),axis=1)\n",
    "    object_combined[site]['unique_objects'] = object_combined[site].apply(lambda row: list(set(row['key_objects_v3'])),axis=1)\n",
    "    object_combined[site]['num_unique_objects'] = object_combined[site].apply(lambda row: len(row['unique_objects']),axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "OBJECTS_V3 = ['Person','Selfie','Dog','Sculpture','Bicycle','Tree','Flower','Structure','Bird','Stone*']\n",
    "imageswith_combined = {}\n",
    "for site in SITE_NAMES:\n",
    "    imageswith_combined[site] = object_combined[site][['image_path','key_objects_v3','num_unique_objects']].copy()\n",
    "    for obj in OBJECTS_V3:\n",
    "        imageswith_combined[site][obj] = imageswith_combined[site].apply(lambda row: getLabel(obj,row['key_objects_v3']), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "proportions_combined = {}\n",
    "for site in SITE_NAMES:\n",
    "    proportions_combined[site] = pd.DataFrame(OBJECTS_V3,columns=['Object'])\n",
    "    proportions_combined[site]['image_count'] = 0\n",
    "    for obj in OBJECTS_V3:\n",
    "        proportions_combined[site].loc[proportions_combined[site]['Object']==obj,'image_count'] = imageswith_combined[site][obj].sum()\n",
    "    proportions_combined[site]['image_percent'] = 100*proportions_combined[site]['image_count']/len(imageswith_combined[site])\n",
    "    proportions_combined[site] = proportions_combined[site].set_index('Object').sort_values(by=['image_count'],ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "matrices_combined = {}\n",
    "for site in SITE_NAMES:\n",
    "    names_per_image = list(object_combined[site].loc[\n",
    "        object_combined[site]['num_unique_objects']>0]['unique_objects']) # exclude images without objects\n",
    "    unique_names = list(set([i for l in names_per_image for i in l]))\n",
    "    unique_names = [i for i in unique_names if proportions_combined[site].loc[\n",
    "        i,'image_percent']>=1] # keep objects occurring in at least 1% of images\n",
    "    unique_names.sort()\n",
    "    occurrences = OrderedDict((name, OrderedDict((name, 0) for name in unique_names))\n",
    "                              for name in unique_names)\n",
    "    # Find the co-occurrences\n",
    "    for l in names_per_image:\n",
    "        l = [name for name in l if name in unique_names]\n",
    "        for i in range(len(l)):\n",
    "            for item in l[:i] + l[i + 1:]:\n",
    "                occurrences[l[i]][item] += 1\n",
    "    matrices_combined[site] = pd.DataFrame.from_dict(occurrences)\n",
    "    # Set diagonal entries to number of images where object occurs alone\n",
    "#     for obj in unique_names:\n",
    "#         matrices_combined[site][obj][obj] = len(imageswith_combined[site].loc[\n",
    "#             (imageswith_combined[site]['num_unique_objects']==1) & (imageswith_combined[site][obj]==1)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "matrices_combined_norm = matrices_combined.copy()\n",
    "for site in SITE_NAMES:\n",
    "    mat_denom = pd.DataFrame(index=matrices_combined_norm[site].index, columns=matrices_combined_norm[site].index)\n",
    "    for rowname,row in matrices_combined_norm[site].iterrows():\n",
    "        for col in row.index:\n",
    "            mat_denom[rowname][col] = imagesWithEither(imageswith_combined[site],rowname,col)\n",
    "    matrices_combined_norm[site] = 100*matrices_combined_norm[site] / mat_denom\n",
    "    matrices_combined_norm[site] = matrices_combined_norm[site].astype(np.float64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for site in SITE_NAMES:\n",
    "    print(site)\n",
    "    display(matrices_combined[site])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for site in SITE_NAMES[1:4]:\n",
    "    print(site)\n",
    "    print('\\tN images (person): {}'.format(sum(imageswith_combined[site]['Person'])))\n",
    "    print('\\tN images (structure): {}'.format(sum(imageswith_combined[site]['Structure'])))\n",
    "    print('\\tN images (person OR sculpture): {}'.format(imagesWithEither(imageswith_combined[site],'Person','Structure')))\n",
    "    print('\\tN images (person AND sculpture): {}'.format(matrices_combined[site].loc['Person']['Structure']))\n",
    "    print('\\tIOU(person, sculpture): {}'.format((100*matrices_combined[site].loc['Person']['Structure'])/imagesWithEither(imageswith_combined[site],'Person','Structure')))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for site in SITE_NAMES:\n",
    "    print(site)\n",
    "#     print('\\tN images (person): {}'.format(sum(imageswith_combined[site]['Person'])))\n",
    "#     print('\\tN images (selfie): {}'.format(sum(imageswith_combined[site]['Selfie'])))\n",
    "#     print('\\tN images (person OR selfie): {}'.format(imagesWithEither(imageswith_combined[site],'Person','Selfie')))\n",
    "#     print('\\tN images (person AND selfie): {}'.format(matrices_combined[site].loc['Person']['Selfie']))\n",
    "#     # print('\\tIOU(person, selfie): {}'.format((100*matrices_combined[site].loc['Person']['Selfie'])/imagesWithEither(imageswith_combined[site],'Person','Selfie')))\n",
    "    print('\\tN images person: {}'.format(100*(sum(imageswith_combined[site]['Person']))/3979))\n",
    "    print('\\tN images (person OR selfie): {}'.format(100*(imagesWithEither(imageswith_combined[site],'Person','Selfie'))/3979))\n",
    "    print('\\tN images ~person: {}'.format(100*(3979-sum(imageswith_combined[site]['Person']))/3979))\n",
    "    print('\\tN images ~(person OR selfie): {}'.format(100*(3979-imagesWithEither(imageswith_combined[site],'Person','Selfie'))/3979))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "vennsets = {}\n",
    "for site in SITE_NAMES[1:4]:\n",
    "    image_paths_person = set(imageswith_combined[site][imageswith_combined[site]['Person']==1]['image_path'])\n",
    "    image_paths_structure = set(imageswith_combined[site][imageswith_combined[site]['Structure']==1]['image_path'])\n",
    "    vennsets[site] = [image_paths_person, image_paths_structure]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "image_paths_person = set(imageswith_combined['RollrightStones*'][imageswith_combined['RollrightStones*']['Person']==1]['image_path'])\n",
    "image_paths_sculpture = set(imageswith_combined['RollrightStones*'][imageswith_combined['RollrightStones*']['Sculpture']==1]['image_path'])\n",
    "image_paths_stone = set(imageswith_combined['RollrightStones*'][imageswith_combined['RollrightStones*']['Stone*']==1]['image_path'])\n",
    "venn_rollright = [image_paths_person,image_paths_sculpture,image_paths_stone]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "figure, axes = plt.subplots(1,3,figsize=(14,8))\n",
    "v1 = venn2(vennsets['RuffordAbbey'],('Person','Structure'),ax=axes[0])\n",
    "v1.get_patch_by_id('10').set_color('orange')\n",
    "v1.get_patch_by_id('10').set_edgecolor('none')\n",
    "v1.get_patch_by_id('10').set_alpha(0.5)\n",
    "v1.get_patch_by_id('01').set_color('blue')\n",
    "v1.get_patch_by_id('01').set_edgecolor('none')\n",
    "v1.get_patch_by_id('01').set_alpha(0.25)\n",
    "v1.get_patch_by_id('11').set_color('brown')\n",
    "v1.get_patch_by_id('11').set_edgecolor('none')\n",
    "v1.get_patch_by_id('11').set_alpha(0.5)\n",
    "for text in v1.set_labels:\n",
    "    text.set_fontsize(18)\n",
    "for text in v1.subset_labels:\n",
    "    text.set_fontsize(18)\n",
    "axes[0].set_title('RuffordAbbey',fontsize=18)\n",
    "v2 = venn2(vennsets['ReculverTowersandRomanFort'],('Person','Structure'),ax=axes[1])\n",
    "v2.get_patch_by_id('10').set_color('orange')\n",
    "v2.get_patch_by_id('10').set_edgecolor('none')\n",
    "v2.get_patch_by_id('10').set_alpha(0.5)\n",
    "v2.get_patch_by_id('01').set_color('blue')\n",
    "v2.get_patch_by_id('01').set_edgecolor('none')\n",
    "v2.get_patch_by_id('01').set_alpha(0.25)\n",
    "v2.get_patch_by_id('11').set_color('brown')\n",
    "v2.get_patch_by_id('11').set_edgecolor('none')\n",
    "v2.get_patch_by_id('11').set_alpha(0.5)\n",
    "for text in v2.set_labels:\n",
    "    text.set_fontsize(18)\n",
    "for text in v2.subset_labels:\n",
    "    text.set_fontsize(18)\n",
    "axes[1].set_title('ReculverTowersandRomanFort',fontsize=18)\n",
    "v3 = venn2(vennsets['BuryStEdmundsAbbey'],('Person','Structure'),ax=axes[2])\n",
    "v3.get_patch_by_id('10').set_color('orange')\n",
    "v3.get_patch_by_id('10').set_edgecolor('none')\n",
    "v3.get_patch_by_id('10').set_alpha(0.5)\n",
    "v3.get_patch_by_id('01').set_color('blue')\n",
    "v3.get_patch_by_id('01').set_edgecolor('none')\n",
    "v3.get_patch_by_id('01').set_alpha(0.25)\n",
    "v3.get_patch_by_id('11').set_color('brown')\n",
    "v3.get_patch_by_id('11').set_edgecolor('none')\n",
    "v3.get_patch_by_id('11').set_alpha(0.5)\n",
    "for text in v3.set_labels:\n",
    "    text.set_fontsize(18)\n",
    "for text in v3.subset_labels:\n",
    "    text.set_fontsize(18)\n",
    "axes[2].set_title('BuryStEdmundsAbbey',fontsize=18)\n",
    "plt.subplots_adjust(wspace=0.2)\n",
    "plt.savefig('5_venn.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(100*44/(1451+44+526))\n",
    "print(100*171/(763+1587+171))\n",
    "print(100*106/(1368+106+974))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "total = len(venn_rollright[0].union(venn_rollright[1]).union(venn_rollright[2]))\n",
    "figure, axes = plt.subplots(1,1,figsize=(8,8))\n",
    "# v4 = venn3(venn_rollright,('Person','Sculpture','Stone'),subset_label_formatter=lambda x: '{:.1f}%'.format(100*x/total))\n",
    "v4 = venn3(venn_rollright,('Person','Sculpture','Stone'))\n",
    "v4.get_patch_by_id('100').set_alpha(0.3)\n",
    "v4.get_patch_by_id('010').set_alpha(0.3)\n",
    "v4.get_patch_by_id('001').set_alpha(0.3)\n",
    "for text in v4.set_labels:\n",
    "    text.set_fontsize(26)\n",
    "for text in v4.subset_labels:\n",
    "    text.set_fontsize(26)\n",
    "# plt.savefig('5_venn_rollright.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Gs_combined = {}\n",
    "sizes_combined = {}\n",
    "for site in SITE_NAMES:\n",
    "    A = np.matrix(matrices_combined_norm[site])\n",
    "    G = nx.from_numpy_matrix(A, create_using=nx.MultiGraph())\n",
    "    object_list = list(matrices_combined_norm[site].index)\n",
    "    mapping = {k:object_list[k] for k in range(len(object_list))}\n",
    "    G = nx.relabel_nodes(G,mapping)\n",
    "    if site == 'RollrightStones*':\n",
    "        nx.write_gml(G, \"{}_v3.gml\".format('RollrightStones2'))\n",
    "    else:\n",
    "        nx.write_gml(G, \"{}_v3.gml\".format(site))\n",
    "    Gs_combined[site] = G\n",
    "    sizes_combined[site] = dict(zip(object_list, [proportions_combined[site]['image_count'].loc[i] for i in object_list]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Plot images with co-occurring objects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getImages(df,obj1,obj2,obj0=0):\n",
    "    \"\"\"\n",
    "    Returns image paths to images with obj1 & obj2 present,\n",
    "        and obj3 absent (optional)\n",
    "    \"\"\"\n",
    "    if isinstance(obj0,str):\n",
    "        return list(df.loc[(df[obj1]==1) & (df[obj2]==1) & (df[obj0]==0)]['image_path'])\n",
    "    else:\n",
    "        return list(df.loc[(df[obj1]==1) & (df[obj2]==1)]['image_path'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(getImages(imageswith_dfs['RuffordAbbey'],'Person','Sculpture'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for n, image_path in enumerate(getImages(imageswith_dfs['RuffordAbbey'],'Person','Sculpture')):\n",
    "    print(n)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "    plt.show()\n",
    "# Derived from the above 35 images:\n",
    "i_love = [21,0,7,4,8,10]\n",
    "ic_love = [16,22,23,24]\n",
    "i_arch = [31,25,17]\n",
    "ic_arch = [6,18,27,28]\n",
    "i_other = [11,20,13]\n",
    "i_garg = [5,9,12,29,32,33,34]\n",
    "i_nazgul = [30]\n",
    "i_collage = [1,2,3,14,15,19]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Person+Sculpture at Rufford Abbey"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10,8))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['RuffordAbbey'],'Person','Sculpture')[i] # of 35 possible images\n",
    "                                for i in i_love + i_arch + i_other]):\n",
    "    plt.subplot(3,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.05, hspace=0.01)\n",
    "# plt.savefig('5_rufford_person_sculpture.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10,10))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['RuffordAbbey'],'Person','Sculpture')[i] # of 35 possible images\n",
    "                                for i in [32, 26, 20, 13]]):\n",
    "    plt.subplot(2,2,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.05, hspace=0.01)\n",
    "plt.savefig('5_rufford_person_sculpture_v2.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Bicycle+Castle, Person+Castle at Reculver Towers and Roman Fort"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for n, image_path in enumerate(getImages(imageswith_dfs['ReculverTowersandRomanFort'],'Bicycle','Castle')):\n",
    "    print(n)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "    plt.show()\n",
    "# Derived from the above 31 images:\n",
    "i_bike = [0,1,2,5,6,7,8,11,12,13,15,21,26,28,29,17,22,23,24,25,19,20]\n",
    "i_cyclist = [30,10,14,16,3,27,4]\n",
    "i_coll = [9,18]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12,14))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['ReculverTowersandRomanFort'],'Bicycle','Castle')[i] # of 31 possible images\n",
    "                              for i in i_bike[:16] + i_cyclist[:4]]):\n",
    "    plt.subplot(5,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.005, hspace=0.05)\n",
    "plt.savefig('5_reculver_bicycle_castle.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10,10))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['ReculverTowersandRomanFort'],'Bicycle','Castle')[i] # of 31 possible images\n",
    "                              for i in [0, 1, 6, 7]]):\n",
    "    plt.subplot(2,2,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.005, hspace=0.05)\n",
    "plt.savefig('5_reculver_bicycle_castle_v2.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(getImages(imageswith_dfs['ReculverTowersandRomanFort'],'Person','Castle'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for n, image_path in enumerate(getImages(imageswith_dfs['ReculverTowersandRomanFort'],'Person','Castle')[60:80]):\n",
    "    print(n)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len([0,1,4,8,15,18,23,24,27,28,30,40,41,51,54])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12,14))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['ReculverTowersandRomanFort'],'Person','Castle')[i] # of 138 possible images\n",
    "                              for i in [1,4,51,68,\n",
    "                                        74,15,27,41,\n",
    "                                        18,40,8,76,\n",
    "                                        28,30,77,83,\n",
    "                                        0,23,24,54]]):\n",
    "    plt.subplot(5,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.005, hspace=0.05)\n",
    "plt.savefig('5_reculver_person_castle.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10,10))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['ReculverTowersandRomanFort'],'Person','Castle')[i] # of 138 possible images\n",
    "                              for i in [1,4,0,23]]):\n",
    "    plt.subplot(2,2,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.005, hspace=0.05)\n",
    "plt.savefig('5_reculver_person_castle_v2.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12,14))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['ReculverTowersandRomanFort'],'Selfie','Castle')[i] for i in [0,2,6,7]] # of 12 possible\n",
    "                               + [getImages(imageswith_dfs['BuryStEdmundsAbbey'],'Selfie','Building')[i] for i in [0,1,6,7]] # of 8 possible\n",
    "                               + [getImages(imageswith_dfs['RollrightStones*'],'Selfie','Sculpture')[i] for i in [0,1,5,7]] # of 8 possible\n",
    "                             + [getImages(imageswith_dfs['RollrightStones*'],'Selfie','Stone*')[i] for i in [0,6,7,13,70,79,23,29]]): # of 87 possible\n",
    "    plt.subplot(5,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.05, hspace=0.05)\n",
    "plt.savefig('5_selfies.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Person+Building, and Person+Ruin (undetected), at Bury St Edmunds Abbey"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# for n, image_path in enumerate(getImages(imageswith_dfs['BuryStEdmundsAbbey'],'Building','Person')[50:100]):\n",
    "#     print(n)\n",
    "#     img = mpimg.imread(image_path)\n",
    "#     plt.imshow(img)\n",
    "#     plt.axis('off')\n",
    "#     plt.show()\n",
    "# Derived from the above 93 images\n",
    "i_ent = [17,32,41,59,60,61,63,77,88,8,13,25,30,40,79,0] # posing with entrance\n",
    "i_abbey = [2,11,21,23,27,28,45,46,49,54,55,67,68,69,70,72,76,78,84,85,6,92,7,22,1,58] # posing with abbey\n",
    "ic_ent = [18,24,29,37,53,57,62,66,71,73,82,87,89]\n",
    "ic_abbey = [4,5,19,36,56,83]\n",
    "# Derived from 1474 images with 'Person'\n",
    "i_ruin = [11,17,23,24,27,35,46,49,24,50,55,62]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12,14))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['BuryStEdmundsAbbey'],'Person','Building')[i] # of 93 possible\n",
    "                                for i in i_abbey[:12] + i_ent[:8]]):\n",
    "    plt.subplot(5,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.005, hspace=0.05)\n",
    "plt.savefig('5_bury_person_building.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12,8))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['BuryStEdmundsAbbey'],'Person','Person')[i] # of 1474 possible\n",
    "                                for i in i_ruin]):\n",
    "    plt.subplot(3,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.02, hspace=0.04)\n",
    "plt.savefig('5_bury_ruins.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10,10))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['BuryStEdmundsAbbey'],'Person','Person')[i] # of 1474 possible\n",
    "                                for i in [11,17,27,35]]):\n",
    "    plt.subplot(2,2,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.02, hspace=0.04)\n",
    "plt.savefig('5_bury_ruins_v2.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(getImages(imageswith_dfs['RuffordAbbey'],'Person','Building'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(14,12))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['RuffordAbbey'],'Person','Building')[i] # of 24 possible\n",
    "                                for i in [1,2,8,15]]+\n",
    "                               [getImages(imageswith_dfs['RuffordAbbey'],'Person','House')[i] # of 20 possible\n",
    "                                for i in [5,6,1,2,0,3,4,11]]):\n",
    "    plt.subplot(3,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.02, hspace=0.0)\n",
    "plt.savefig('5_rufford_person_building_house.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10,10))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['RuffordAbbey'],'Person','Building')[i] # of 24 possible\n",
    "                                for i in [1,2]]+\n",
    "                               [getImages(imageswith_dfs['RuffordAbbey'],'Person','House')[i] # of 20 possible\n",
    "                                for i in [4,11]]):\n",
    "    plt.subplot(2,2,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.2, hspace=0.0)\n",
    "plt.savefig('5_rufford_person_building_house_v2.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Dogs everywhere"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12,14))\n",
    "for n, image_path in enumerate(getImages(imageswith_dfs['CastleriggStoneCircle'],'Dog','Dog','Selfie')[:4]\n",
    "                              + getImages(imageswith_dfs['RuffordAbbey'],'Dog','Dog','Selfie')[:4]\n",
    "                              + getImages(imageswith_dfs['ReculverTowersandRomanFort'],'Dog','Dog','Selfie')[3:7]\n",
    "                              + [getImages(imageswith_dfs['BuryStEdmundsAbbey'],'Dog','Dog','Selfie')[i] for i in [1,2,4,5]]\n",
    "                              + [getImages(imageswith_dfs['RollrightStones'],'Dog','Dog','Selfie')[i] for i in [26,40,28,43]]):\n",
    "    plt.subplot(5,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.0001, hspace=0.05)\n",
    "# plt.savefig('5_dogs.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Architectural qualities of Rufford Abbey and Bury St Edmunds Abbey, retrieved using ``Building``, ``House`` in absence of ``Person``."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(getImages(imageswith_dfs['RuffordAbbey'],'Building','Building','Person'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12,12))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['BuryStEdmundsAbbey'],'Building','Building','Person')[i] for i in [13,15,17,18,19,20,23,26]]+ # of 744 possible\n",
    "                              [getImages(imageswith_dfs['BuryStEdmundsAbbey'],'House','House','Person')[i] for i in [20,22,23,24,25,29,33,38]]): # of 184 possible\n",
    "    plt.subplot(4,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.05, hspace=0.05)\n",
    "plt.savefig('5_bury_building_house.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10,10))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['BuryStEdmundsAbbey'],'Building','Building','Person')[i] for i in [17,18,23,26]]): # of 744 possible\n",
    "    plt.subplot(2,2,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.05, hspace=0.05)\n",
    "plt.savefig('5_bury_building_v2.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12,12))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['RuffordAbbey'],'Building','Building','Person')[i] for i in [1,2,4,5,6,9,10,11,13,15,16,17,18,19,20,21]]):  # of 231 possible\n",
    "    plt.subplot(4,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.05, hspace=0.03)\n",
    "plt.savefig('5_rufford_building.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10,10))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['RuffordAbbey'],'Building','Building','Person')[i] for i in [4,5,10,11]]):  # of 231 possible\n",
    "    plt.subplot(2,2,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.05, hspace=0.03)\n",
    "plt.savefig('5_rufford_building_v2.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(14,12))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['RuffordAbbey'],'House','House','Person')[i] for i in [0,2,3,7,9,11,13,14,15,17,21,22]]): # of 270 possible\n",
    "    plt.subplot(3,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.05, hspace=0.05)\n",
    "plt.savefig('5_rufford_house.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Rollright Stones*: Person+Stone, Selfie+Stone, Person+Sculpture"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(getImages(imageswith_dfs['RollrightStones*'],'Person','Person'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(getImages(imageswith_dfs['RollrightStones*'],'Stone*','Selfie'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(14,12))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['RollrightStones*'],'Person','Stone*')[i] for i in [0,16,101,38,\n",
    "                                                                                                             36,115,13,39,\n",
    "                                                                                                             35,104,3,119]]): # of 582 possible\n",
    "    plt.subplot(3,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.05, hspace=0.0)\n",
    "plt.savefig('5_rollright_person_stone.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Rollright Stones with Person, Selfie and Sculpture."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(len(getImages(imageswith_dfs['RollrightStones*'],'Sculpture','Person')))\n",
    "print(len(getImages(imageswith_dfs['RollrightStones*'],'Sculpture','Selfie')))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for n, image_path in enumerate(getImages(imageswith_dfs['RollrightStones*'],'Sculpture','Person')[40:]):\n",
    "    print(n)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(14,12))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['RollrightStones*'],'Person','Sculpture')[i] for i in[48,38,23,42,\n",
    "                                                                                                               12,16,57,9,\n",
    "                                                                                                               45,20,4,44]]): # of 61 possible\n",
    "    plt.subplot(3,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.02, hspace=0.0)\n",
    "plt.savefig('5_rollright_person_sculpture.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10,10))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['RollrightStones*'],'Person','Sculpture')[i] for i in[48,38,12,16]]): # of 61 possible\n",
    "    plt.subplot(2,2,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.02, hspace=0.0)\n",
    "plt.savefig('5_rollright_person_sculpture_v2.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(getImages(imageswith_dfs['ReculverTowersandRomanFort'],'Dog','Castle'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(14,12))\n",
    "for n, image_path in enumerate(getImages(imageswith_dfs['ReculverTowersandRomanFort'],'Dog','Castle')[20:22]): # of 22 possible\n",
    "    plt.subplot(5,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.05, hspace=0.05)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(12,14))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['ReculverTowersandRomanFort'],'Dog','Castle')[i] for i in [2,4,5,7,10,13,18,19]] + # of 22 possible\n",
    "                               [getImages(imageswith_dfs['BuryStEdmundsAbbey'],'Dog','Building')[i] for i in [0,2,4,5]] + # of 6 possible\n",
    "                               [getImages(imageswith_dfs['RollrightStones*'],'Dog','Stone*')[i] for i in [2,4,8,10,11,13,23,48]]): # of 58 possible\n",
    "    plt.subplot(5,4,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.001, hspace=0.05)\n",
    "plt.savefig('5_dogs.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure(figsize=(10,10))\n",
    "for n, image_path in enumerate([getImages(imageswith_dfs['ReculverTowersandRomanFort'],'Dog','Castle')[i] for i in [5,7,18,19]] # of 22 possible\n",
    "                              ):\n",
    "    plt.subplot(2,2,n+1)\n",
    "    img = mpimg.imread(image_path)\n",
    "    plt.imshow(img)\n",
    "    plt.axis('off')\n",
    "plt.subplots_adjust(wspace=0.001, hspace=0.05)\n",
    "plt.savefig('5_dogs_v2.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Rollright Stones"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rollright_plot = {0:1068,1:9,2:45,3:66,4:24,5:1154,6:2152,7:1156,8:1169,9:2054,10:3150,11:3163}\n",
    "for v in rollright_plot.values():\n",
    "    plt.figure(figsize=(10,12))\n",
    "    row = output_dfs['RollrightStones'].iloc[v]\n",
    "    print(v)\n",
    "    print_scores(row, category_index)\n",
    "    image = Image.open(row['image_path'])\n",
    "    image_np = load_image_into_numpy_array(image)\n",
    "    vis_util.visualize_boxes_and_labels_on_image_array(\n",
    "        image_np,\n",
    "        row['detection_boxes'],\n",
    "        row['detection_classes'],\n",
    "        row['detection_scores'],\n",
    "        category_index,\n",
    "        instance_masks=row.get('detection_masks'),\n",
    "        use_normalized_coordinates=True,\n",
    "        line_thickness=8)\n",
    "    plt.axis('off')\n",
    "    plt.imshow(image_np)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rollright_plot = {0:1068,1:9,2:45,3:66,4:24,5:1154,6:2152,7:1156,8:1169,9:2054,10:3150,11:3163}\n",
    "plt.figure(figsize=(12,16))\n",
    "for k, v in rollright_plot.items():\n",
    "    row = output_dfs['RollrightStones'].iloc[v]\n",
    "    plt.subplot(4,3,k+1)\n",
    "    image = Image.open(row['image_path'])\n",
    "    image_np = load_image_into_numpy_array(image)\n",
    "    vis_util.visualize_boxes_and_labels_on_image_array(\n",
    "        image_np,\n",
    "        row['detection_boxes'],\n",
    "        row['detection_classes'],\n",
    "        row['detection_scores'],\n",
    "        category_index,\n",
    "        instance_masks=row.get('detection_masks'),\n",
    "        use_normalized_coordinates=True,\n",
    "        line_thickness=8)\n",
    "    plt.axis('off')\n",
    "    plt.imshow(image_np)\n",
    "plt.tight_layout(pad=0)\n",
    "plt.savefig('4_rollright.png',bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_dfs2 = output_dfs['ReculverTowersandRomanFort'][output_dfs['ReculverTowersandRomanFort']['image_path'].isin(testpaths)].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i, row in test_dfs2[:10].iterrows():\n",
    "    print(i)\n",
    "    image = Image.open(row['image_path'])\n",
    "\n",
    "    # the array based representation of the image will be used later in order to prepare the\n",
    "    # result image with boxes and labels on it.\n",
    "    image_np = load_image_into_numpy_array(image)\n",
    "\n",
    "    # Print scores\n",
    "    print_scores(row, category_index)\n",
    "\n",
    "    # Without conversion to normalised coordinates\n",
    "    vis_util.visualize_boxes_and_labels_on_image_array(\n",
    "        image_np,\n",
    "        row['detection_boxes'],\n",
    "        row['detection_classes'],\n",
    "        row['detection_scores'],\n",
    "        category_index,\n",
    "        instance_masks=row.get('detection_masks'),\n",
    "        use_normalized_coordinates=True,\n",
    "        line_thickness=8)\n",
    "\n",
    "    plt.figure(figsize=IMAGE_SIZE)\n",
    "    plt.axis('off')\n",
    "    plt.imshow(image_np)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "name": "TFobjectDetection_v2.0.ipynb",
   "provenance": [],
   "toc_visible": true,
   "version": "0.3.2"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  },
  "toc-autonumbering": false
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
